Carga de Archivos y metadata.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggstatsplot)
## You can cite this package as:
##      Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
##      Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
library(haven)
library(phyloseq)
library(ggsci)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-2
library(uwot)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(ggforce)
library(glue)
library(easystats)
## # Attaching packages: easystats 0.5.2.8 (red = needs update)
## ✔ bayestestR  0.13.0    ✖ correlation 0.8.2  
## ✖ datawizard  0.6.0     ✖ effectsize  0.7.0.5
## ✖ insight     0.18.4    ✔ modelbased  0.8.5  
## ✖ performance 0.9.2     ✖ parameters  0.18.2 
## ✔ report      0.5.5.1   ✔ see         0.7.3  
## 
## Restart the R-Session and update packages in red with `easystats::easystats_update()`.
library(ggrepel)
library(gamlss)
## Loading required package: splines
## Loading required package: gamlss.data
## 
## Attaching package: 'gamlss.data'
## 
## The following object is masked from 'package:datasets':
## 
##     sleep
## 
## Loading required package: gamlss.dist
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## Loading required package: parallel
##  **********   GAMLSS Version 5.4-3  ********** 
## For more on GAMLSS look at https://www.gamlss.com/
## Type gamlssNews() to see new features/changes/bug fixes.
library(emmeans)


source("/storage/megalodon/R/import_kraken.R")
source("/storage/megalodon/R/kraken_heat_tree.R")
setwd("/storage/mariaInsenser/")
files <- dir("./reports/", pattern = "report", full.names = T)


reports <- import_kraken(files,20)
## Loading required package: doParallel
## Loading required package: foreach
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## Loading required package: iterators
metadata <- read_tsv("LastMetadata.tsv")
## Registered S3 method overwritten by 'bit':
##   method   from  
##   print.ri gamlss
## Rows: 234 Columns: 94
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (7): Sample, Position, Run, Code_Sample, Description, GRUPO, Madre
## dbl (87): NumReads, Code_Microbiota, Studio, Orden, Heces, Grupos, Grupo3, D...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metadata <- metadata %>% mutate(Description = ifelse(grepl("solo",Description),"Destete",Description))

design <- read_tsv("DesignStudy")
## Rows: 12 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): Group, Sex, Diet, Treatment
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
table <- reports %>% 
  distinct() %>% 
  inner_join(metadata)
## Joining, by = "Sample"

Quality Control

Distribucion de reads en cada grupo

table %>% mutate(Studio = as.factor(Studio)) %>% 
  filter(Name =="root") %>% 
  ggplot(aes(x=GRUPO, y = Reads, fill = Studio)) + 
  geom_boxplot() + facet_wrap(Studio ~ ., scale = "free_x")

Numero de reasd classificados y no clasificados

table %>% 
  filter(depth ==1) %>% 
  mutate(Name2 = gsub("root","classified",Name2)) %>% 
  distinct() %>%
  ggplot(aes(x=Sample, y = Prop, fill = Name2)) + 
  geom_col() +
  coord_flip() + scale_fill_d3()

Grupo 1

1. A nivel madres gestantes (Un Ăºnico factor: Tratamiento = tres grupos)
    â—¦ Control (sin inyecciĂ³n de testosterona) (n=4)
    â—¦ 5 mgr (inyecciĂ³n de 5 mgr testosterona) (n=5)
    â—¦ 20 mgr (inyecciĂ³n de 20 mgr testosterona) (n=5)

Existe diferencia en la microbiota de las madres teniendo en cuenta el factor tratamiento? alpha y beta diversidad. • Hay discriminaciĂ³n entre grupos (la androgenizaciĂ³n influye en la microbiota)? • DĂ³nde estĂ¡ la diferencia? Control vs 5mg vs 20mg vs Control.

Creamos la OTU-table para resolver la pregunta.

samplingSize <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(Studio ==2) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table_q1 <- table %>% 
  filter(Studio ==2) %>% 
  filter(grepl("G",TaxRank)) %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% 
  t() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  t()

Alpha Diversidad

Calculamos la alpha diversity

simpson <- diversity(otu_table_q1, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table_q1, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"
div %>% 
  ggplot(aes(x = NumReads, y = DiversityValue)) + 
  geom_point() + 
  facet_wrap(.~DiversityIndex, scales = "free")

div %>% 
  filter(DiversityIndex == "Shannon") %>% 
  pull(DiversityValue) %>% 
  shapiro.test()
## 
##  Shapiro-Wilk normality test
## 
## data:  .
## W = 0.93032, p-value = 0.3083
div %>% 
  filter(DiversityIndex == "Simpson") %>% 
  pull(DiversityValue) %>% 
  shapiro.test()
## 
##  Shapiro-Wilk normality test
## 
## data:  .
## W = 0.94282, p-value = 0.4557
div %>% 
  filter(DiversityIndex =="Shannon") %>% 
  ggbetweenstats(x = GRUPO, y = DiversityValue, type = "robust")

div %>% 
  filter(DiversityIndex =="Simpson") %>% 
  ggbetweenstats(x = GRUPO, y = DiversityValue, type = "robust")

No parece que haya diferencias entre la alpha diverisdad de los grupos.

Beta Diversidad

Vamos a calcular la Beta-diversidad.

otu_table_q1 <- table %>% 
  filter(Studio ==2) %>% 
  filter(grepl("G",TaxRank)) %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% t()

dist_q1 <- avgdist(otu_table_q1,sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist_q1 %>% 
                     dplyr::select(Sample:H3_S24.run1.report) %>% 
                     column_to_rownames("Sample") %>% 
                     as.matrix() %>% 
                     as.dist(), 
                   annotation_row = dist_q1 %>% 
                     dplyr::select(Sample,GRUPO) %>% 
                     column_to_rownames("Sample"))

dist_ad <-dist_q1 %>% 
  dplyr::select(Sample:H3_S24.run1.report) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  as.dist() 

adonis2(dist_ad ~ GRUPO, dist_q1 %>% 
                     dplyr::select(Sample,GRUPO) %>% 
                     column_to_rownames("Sample"))

Podemos concluir que a nivel metagenomico no existen diferencias ni en alpha diversidad ni en beta diversidad entre los grupos de madres.

AnĂ¡lisis diferencial

• QuĂ© taxas son las responsables de las diferencias (la androgenizaciĂ³n afecta a determinadas taxas)? CuĂ¡les discriminan mĂ¡s? • Es dosis dependiente? Ej: Control < 5mg < 20mg o al revĂ©s para determinados phylum o genera.

samplingSize <- table %>% 
  filter(Studio ==2) %>% 
  filter(grepl("G",TaxRank)) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table_q1 <- table %>% 
  filter(Studio ==2) %>% 
  filter(grepl("G",TaxRank)) %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = NCBI,
              values_from = Reads,
              values_fill =0) %>% 
  as.data.frame() %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata) %>% 
  mutate(GRUPO = as.factor(GRUPO))
## Joining, by = "Sample"
gamlss_q1 <- otu_table_q1 %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,GRUPO,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ GRUPO, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F)))) %>% 
  mutate(test = list(emmeans(model, specs = "GRUPO", data = data) %>% 
                       pairs() %>% 
                       as.data.frame()))

gamlss_q1 %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,final_pvalue)
## Joining, by = "NCBI"
gamlss_q1 %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,final_pvalue) %>% 
  separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(final_pvalue < 0.05,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(final_pvalue), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + facet_wrap(A ~ B)
## Joining, by = "NCBI"
## Warning: Removed 2024 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 15 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Cuestion 2

A nivel ratas en el destete (Dos factores: Tratamiento y Sexo)
    â—¦ Control (51: 32 machos, 19 hembras)
    â—¦ 5 mgr (55: 37 machos, 18 hembras)
    â—¦ 20 mgr (12: 7 machos, 5 hembras)

Vamos a preparar la tabla

samplingSize <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Destete",Description)) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table_q2 <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Destete",Description)) %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% t() %>% rrarefy(sample = samplingSize$TotalReads) %>% t()

Preparamos el metadata con las variables de tratamiento (Control , 5mg y 20mg) y sexo

meta_q2 <- metadata %>%
  filter(grepl("Destete",Description)) %>%
  mutate(Tratamiento = ifelse(Description == "Destete 20mg","20mg",Tratamiento)) %>% 
  mutate(Tratamiento = ifelse(Tratamiento == "0","Contro",Tratamiento)) %>% 
  mutate(Tratamiento = ifelse(Tratamiento == "1","5mg",Tratamiento)) %>% 
  dplyr::select(Sample,Tratamiento,Sexo,NumReads)

Existe diferencia en la microbiota de la progenie teniendo en cuenta el factor tratamiento? Y teniendo en cuenta el sexo? alpha y beta diversidad. ¿QuĂ© grupos se parecen mĂ¡s? (Eso se puede ver representando en la matriz de distancias los diferentes grupos y ver cuĂ¡les estĂ¡n mĂ¡s o menos cerca?). DĂ³nde estĂ¡n las diferencias (significaciĂ³n estadĂ­stica) y quĂ© taxas son las que discriminan entre los grupos?

Alpha Diversidad

Calculamos la alpha diversidad.

simpson <- diversity(otu_table_q2, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table_q2, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(meta_q2)
## Joining, by = "Sample"
## Joining, by = "Sample"
div %>% 
  ggplot(aes(x = NumReads, y = DiversityValue)) + 
  geom_point() + 
  facet_wrap(.~DiversityIndex, scales = "free")

div %>% filter(DiversityIndex =="Shannon") %>% ggbetweenstats(x = Tratamiento, y = DiversityValue, type = "robust")

div %>% filter(DiversityIndex =="Simpson") %>% ggbetweenstats(x = Tratamiento, y = DiversityValue, type = "robust")

Segun los test estadisticos podemos concluir que si existen diferencias en alpha diversidad entre los grupos.

Beta diversidad

Vamos a calcular la Beta-diversidad.

otu_table_q2 <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Destete",Description)) %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% t()

dist_q2 <- avgdist(otu_table_q2,sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(meta_q2)
## Joining, by = "Sample"
pheatmap::pheatmap(dist_q2 %>% dplyr::select(Sample,ends_with("report")) %>% column_to_rownames("Sample") %>% as.matrix() %>% as.dist(), 
                   annotation_row = dist_q2 %>% 
                     dplyr::select(Sample,Tratamiento,Sexo) %>% 
                     column_to_rownames("Sample"))

dist_ad <-dist_q2 %>% dplyr::select(Sample, ends_with("report")) %>% column_to_rownames("Sample") %>% as.matrix() %>% as.dist()

adonis2(dist_ad ~ Tratamiento*Sexo, dist_q2 %>% 
                     dplyr::select(Sample,Tratamiento,Sexo) %>% 
                     column_to_rownames("Sample"), permutations = 1000)

Parece que existe una diferencia composicional entre los grupos de tratamiento pero no entre el sexo.

Analisis diferencial

samplingSize <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Destete",Description)) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table_q2 <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Destete",Description)) %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = NCBI,
              values_from = Reads,
              values_fill =0) %>% 
  as.data.frame() %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata) %>% 
  mutate(Tratamiento = ifelse(Tratamiento == "0","Control",Tratamiento)) %>% 
  mutate(Tratamiento = ifelse(Tratamiento == "1","5mg",Tratamiento)) %>%
  mutate(Tratamiento = ifelse(is.na(Tratamiento),"20mg",Tratamiento)) %>%
  mutate(Sexo = fct_recode(as.factor(Sexo),"Hembra" ="0","Macho" ="1"))
## Joining, by = "Sample"
gamlss_q2 <- otu_table_q2 %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Tratamiento,Sexo,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Tratamiento + Sexo, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F)))) %>% mutate(test = list(emmeans(model, specs = "Tratamiento", data = data) %>% pairs() %>% as.data.frame()))

gamlss_q2 %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,final_pvalue)
## Joining, by = "NCBI"
gamlss_q2 %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,final_pvalue) %>% 
  separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(final_pvalue < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(final_pvalue), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + facet_wrap(A ~ B)
## Joining, by = "NCBI"
## Warning: Removed 2026 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Cuestion 3

A nivel ratas adultas [Factores: Tratamiento, Dieta, y Grupo (machos, hembras y machos castrados)]

Vamos a preparar la tabla

samplingSize <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Adulta",Description)) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table_q3 <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Adulta",Description)) %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% t() %>% rrarefy(sample = samplingSize$TotalReads) %>% t()

Preparamos el metadata con las variables de tratamiento (Control , 5mg y 20mg) y sexo

meta_q3 <- metadata %>%
  filter(grepl("Adulta",Description)) %>%
  mutate(Tratamiento = ifelse(Tratamiento == "0","Control",Tratamiento)) %>% 
  mutate(Tratamiento = ifelse(Tratamiento == "1","5mg",Tratamiento)) %>% 
  mutate(Dieta = ifelse(Dieta ==0,"Dieta_No","Dieta_Si")) %>% 
  mutate(Grupo = fct_recode(as.factor(Sexo + Castrado),"Hembra" ="0","Macho" ="1", "Macho_Castrado" ="2" )) %>% 
  dplyr::select(Sample,Tratamiento,Sexo,Dieta,Castrado,NumReads,Grupo,Madre,NºJaula) %>% mutate_if(is.character, as.factor)

Alpha Diversidad

Calculamos la alpha diversidad.

simpson <- diversity(otu_table_q3, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table_q3, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(meta_q3) %>% 
  mutate_if(is.character, as.factor) %>% 
  mutate(Dieta = fct_recode(as.factor(Dieta),"Dieta Si" = "1", "Dieta No" ="0"))
## Joining, by = "Sample"
## Joining, by = "Sample"
## Warning: Unknown levels in `f`: 1, 0
div %>% 
  ggplot(aes(x = NumReads, y = DiversityValue)) + 
  geom_point() + 
  facet_wrap(.~DiversityIndex, scales = "free")

div %>% ggplot(aes(x=Grupo, y = DiversityValue, fill = Tratamiento)) + geom_boxplot() + facet_grid(DiversityIndex ~ Dieta, scales = "free_y") 

div %>% filter(DiversityIndex =="Shannon") %>% ggbetweenstats(x = Tratamiento, y = DiversityValue, type = "bayes",title = "Shannon Diversity in Treatment")

div %>% filter(DiversityIndex =="Simpson") %>% ggbetweenstats(x = Tratamiento, y = DiversityValue, type = "bayes",title = "Simpson Diversity in Treatment")

div %>% filter(DiversityIndex =="Shannon") %>% ggbetweenstats(x = Grupo, y = DiversityValue, type = "bayes",title = "Shannon Diversity in Group")

div %>% filter(DiversityIndex =="Simpson") %>% ggbetweenstats(x = Grupo, y = DiversityValue, type = "bayes",title = "Simpson Diversity in Group")

div %>% filter(DiversityIndex =="Shannon") %>% ggbetweenstats(x = Dieta, y = DiversityValue, type = "bayes",title = "Shannon Diversity in Dieta")

div %>% filter(DiversityIndex =="Simpson") %>% ggbetweenstats(x = Dieta, y = DiversityValue, type = "bayes",title = "Simpson Diversity in Dieta")

Vamos a tratar de realizar un ANOVA o Kruskal-Wallis de las alpha diversity.

input_table <- div %>% 
  dplyr::select(Sample,DiversityIndex,DiversityValue,Tratamiento,Grupo,Dieta) %>% 
  pivot_wider(names_from = DiversityIndex, values_from = DiversityValue) %>% mutate_if(is.character, as.factor)

Modelos ANOVA sobre el indice Shannon

input_table %>% rstatix::anova_test(Shannon ~ Dieta * Tratamiento * Grupo) 
## Coefficient covariances computed by hccm()

Lo resultados parecen descartar cualquier interacciĂ³n, por lo que realizamos el anĂ¡lisis pos-hoc sin interacciones.

input_table %>% rstatix::tukey_hsd(Shannon ~ Grupo + Dieta + Tratamiento)

Modelos GLM sobre el indice Simpson (Simpson no es parametrico)

mod <- gamlss(Simpson ~ Dieta + Tratamiento + Grupo, data = input_table, family ="GA")
## GAMLSS-RS iteration 1: Global Deviance = -150.8023 
## GAMLSS-RS iteration 2: Global Deviance = -150.8023
summary(mod)
## ******************************************************************
## Family:  c("GA", "Gamma") 
## 
## Call:  gamlss(formula = Simpson ~ Dieta + Tratamiento + Grupo,  
##     family = "GA", data = input_table) 
## 
## Fitting method: RS() 
## 
## ------------------------------------------------------------------
## Mu link function:  log
## Mu Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.281534   0.032431  -8.681 8.76e-14 ***
## DietaDieta_Si        0.069622   0.029833   2.334   0.0217 *  
## TratamientoControl  -0.041804   0.029858  -1.400   0.1646    
## GrupoMacho          -0.002112   0.036282  -0.058   0.9537    
## GrupoMacho_Castrado  0.046875   0.036285   1.292   0.1994    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------------------------------
## Sigma link function:  log
## Sigma Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.88579    0.06907   -27.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------------------------------
## No. of observations in the fit:  104 
## Degrees of Freedom for the fit:  6
##       Residual Deg. of Freedom:  98 
##                       at cycle:  2 
##  
## Global Deviance:     -150.8023 
##             AIC:     -138.8023 
##             SBC:     -122.936 
## ******************************************************************
estimate_contrasts(mod, contrast = c("Dieta","Tratamiento","Grupo"))

Vamos a calcular la Beta-diversidad.

samplingSize <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Adulta",Description)) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table_q3 <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Adulta",Description)) %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% t()
dist_q3 <- avgdist(otu_table_q3,sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(meta_q3)
## Joining, by = "Sample"
pheatmap::pheatmap(dist_q3 %>% 
                     dplyr::select(Sample,ends_with("report")) %>% 
                     column_to_rownames("Sample") %>% 
                     as.matrix() %>% 
                     as.dist(), 
                   annotation_row = dist_q3 %>% 
                     dplyr::select(Sample,Tratamiento,Grupo,Dieta, Madre, NºJaula) %>%
                     mutate(NºJaula = as.factor(NºJaula)) %>% 
                     column_to_rownames("Sample"))

dist_ad <-dist_q3 %>% 
  dplyr::select(Sample, ends_with("report")) %>% 
  column_to_rownames("Sample") %>%
  as.matrix() %>%  
  as.dist()

adonis2(dist_ad ~ Tratamiento*Grupo*Dieta*Madre*NºJaula , dist_q3 %>% 
                     dplyr::select(Sample,Tratamiento,Grupo,Dieta,Madre,NºJaula) %>%
          mutate(NºJaula = as.factor(NºJaula)) %>% 
                     column_to_rownames("Sample"), permutations = 1000)

Como parece que no existen interacciones repetimos el analisis sin interacciones

adonis2(dist_ad ~ Tratamiento+Grupo+Dieta+Madre+NºJaula , dist_q3 %>% 
                     dplyr::select(Sample,Tratamiento,Grupo,Dieta,Madre,NºJaula) %>% 
          mutate(NºJaula = as.factor(NºJaula)) %>% 
                     column_to_rownames("Sample"), permutations = 1000)

Test diferencial

samplingSize <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Adulta",Description)) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table_q3 <- table %>% 
  filter(grepl("G",TaxRank)) %>% 
  filter(grepl("Adulta",Description)) %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = NCBI,
              values_from = Reads,
              values_fill =0) %>% 
  as.data.frame() %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata) %>% 
  mutate(Dieta = ifelse(Dieta == 1, "Dieta_Si","Dieta_No")) %>% 
  mutate(Tratamiento = ifelse(Tratamiento == "0","Control",Tratamiento)) %>% 
  mutate(Tratamiento = ifelse(Tratamiento == "1","5mg",Tratamiento)) %>% 
  mutate(Grupo = fct_recode(as.factor(Sexo + Castrado),"Hembra" ="0","Macho" ="1", "Macho_Castrado" ="2" ))
## Joining, by = "Sample"
gamlss_q3 <- otu_table_q3 %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Dieta,Grupo,Tratamiento,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Dieta + Grupo+ Tratamiento, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F))))
   
gamlss_q3 %>% 
  mutate(test = list(emmeans(model, specs = "Dieta", data = data) %>% 
                       pairs() %>% 
                       as.data.frame())) %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,final_pvalue) %>% 
  separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(final_pvalue < 0.001,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(final_pvalue), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + facet_wrap(A ~ B)
## Joining, by = "NCBI"
## Warning: Removed 614 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 58 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Test a todos los rangos

samplingSize <- table %>% 
  filter(TaxRank == "R") %>% 
  filter(grepl("Adulta",Description)) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table_q3 <- table %>% 
  filter(grepl("Adulta",Description)) %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = NCBI,
              values_from = Reads,
              values_fill =0) %>% 
  as.data.frame() %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata) %>% 
  mutate(Dieta = ifelse(Dieta == 1, "Dieta_Si","Dieta_No")) %>% 
  mutate(Tratamiento = ifelse(Tratamiento == "0","Control",Tratamiento)) %>% 
  mutate(Tratamiento = ifelse(Tratamiento == "1","5mg",Tratamiento)) %>% 
  mutate(Grupo = fct_recode(as.factor(Sexo + Castrado),"Hembra" ="0","Macho" ="1", "Macho_Castrado" ="2" ))
## Joining, by = "Sample"
samplingSize <- table %>% 
  filter(TaxRank == "R") %>% 
  filter(grepl("Adulta",Description)) %>% 
  rename(TotalReads = Reads) %>% 
  group_by(Sample) %>% 
  slice_max(TotalReads) %>% 
  dplyr::select(Sample, TotalReads)


gamlss_q3 <- otu_table_q3 %>% 
  inner_join(samplingSize) %>% 
  group_by(Sample) %>%
  mutate(NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Dieta,Grupo,Tratamiento,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Dieta + Grupo+ Tratamiento, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F))))
## Joining, by = "Sample"
gamlss_q3 %>% 
  mutate(test = list(emmeans(model, specs = "Dieta", data = data) %>% 
                       pairs() %>% 
                       as.data.frame())) %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2,TaxRank) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,TaxRank,Name = Name2,contrast,z.ratio,final_pvalue) %>% 
  separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(final_pvalue < 0.001,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(final_pvalue), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + facet_wrap(A ~ B)
## Joining, by = "NCBI"
## Warning: Removed 615 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 75 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Nuevo Aproach

Cuestion 1: Machos vs Hembras (Dieta NO, Tratamiento Control, Studio 3, Castrados no) formula: ~ Sexo

Cuestion 2: Machos vs Hembras (Dieta SI/NO, Tratamiento Control, Studio 3, Castrados no) formula: ~Sexo * Dieta

Cuestion 3.1: Machos vs Machos Castrados (Dieta NO, Tratamiento Control, Studio 3, Hembras NO) formula: ~Castrado

Cuestion 3.2: Hembras Tratamiento control vs Hembras Tratamiento (Dieta NO, Studio3, Machos NO) forumula: ~Tratamiento

Cuestion 4.1: Machos vs Machos Castrados (Dieta SI/NO, Tratamiento Control, Studio 3, Hembras NO) formula: ~Castrado * Dieta

Cuestion 4.2: Hembras Tratamiento control vs Hembras Tratamiento (Dieta SI/NO, Studio3, Machos NO) forumula: ~Tratamiento * Dieta

Cuestion 5.1: Crear nueva variable. Disfuncion gonadal (SI <- Machos Castrados, Hembras Tratamiento Si, El resto NO) Sampling: Hembras Dieta NO, Tratameinto SI/NO Machos Dieta NO, Tratamiento NO Machos Castrados, Dieta NO, Tratamiento NO formula = ~ Sexo * Disfuncion Gonadal

Cuestion 5.2 Crear nueva variable. Disfuncion gonadal (SI <- Machos Castrados, Hembras Tratamiento Si, El resto NO) Sampling: Hembras Dieta SI/NO, Tratameinto SI/NO Machos Dieta SI/NO, Tratamiento NO Machos Castrados, Dieta SI/NO, Tratamiento NO formula = ~ Sexo * Disfuncion Gonadal * Dieta